On andromeda
Download protein file from genome
cd /data/putnamlab/zdellaert/Pdam-TagSeq/references
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.pep.faa.gz
gunzip Pocillopora_acuta_HIv2.genes.pep.faa.gz
cd ..
mkdir blast
cd blast
On personal computer
scp /Users/zoedellaert/Documents/URI/Heron-Pdam-gene-expression/BioInf/data/Biomineralization_Toolkit_FScucchia/Biomineralization_Toolkit_FScucchia.fasta zdellaert@ssh3.hac.uri.edu:/data/putnamlab/zdellaert/Pdam-TagSeq/blast/
Biomineralization_Toolkit_FScucchia.fasta
On andromeda
nano Biomineralization_blast.sh
#!/bin/bash
#SBATCH --job-name="Pacuta_TRP_blast"
#SBATCH -t 240:00:00
#SBATCH --export=NONE
#SBATCH --mail-type=BEGIN,END,FAIL #email you when job starts, stops and/or fails
#SBATCH --mail-user=zdellaert@uri.edu #your email to send notifications
#SBATCH --mem=100GB
#SBATCH --error="blast_out_error"
#SBATCH --output="blast_out"
#SBATCH --account=putnamlab
#SBATCH -D /data/putnamlab/zdellaert/Pdam-TagSeq/blast/
#SBATCH --nodes=1 --ntasks-per-node=20
module load BLAST+/2.9.0-iimpi-2019b
makeblastdb -in ../references/Pocillopora_acuta_HIv2.genes.pep.faa -out Pacuta_prot -dbtype prot
blastp -query Biomineralization_Toolkit_FScucchia.fasta -db Pacuta_prot -out Biomineralization_blast_results.txt -outfmt 0
blastp -query Biomineralization_Toolkit_FScucchia.fasta -db Pacuta_prot -out Biomineralization_blast_results_tab.txt -outfmt 6 -max_target_seqs 1
sbatch Biomineralization_blast.sh
Errors:
Warning: [blastp] Examining 5 or more matches is recommended FASTA-Reader: Ignoring invalid residues at position(s): On line 2741: 378, 383, 386-390, 401, 417, 420-422, 431, 437-439, 443, 459-461 Warning: [blastp] Query_168 Gene: g13552, N.. : One or more O characters replaced by X for alignment score calculations at positions 382, 390, 392, 422
On personal computer:
scp zdellaert@ssh3.hac.uri.edu:/data/putnamlab/zdellaert/Pdam-TagSeq/blast/Biomineralization_blast_results.txt /Users/zoedellaert/Documents/URI/Heron-Pdam-gene-expression/BioInf/output
scp zdellaert@ssh3.hac.uri.edu:/data/putnamlab/zdellaert/Pdam-TagSeq/blast/Biomineralization_blast_results_tab.txt /Users/zoedellaert/Documents/URI/Heron-Pdam-gene-expression/BioInf/output
Now, will take the best Pacuta alignment for each Biomineralization Gene and match to the name of that gene and make the dataframe into a format to match with differentially expressed/frontloaded genes or modules.
sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.33 R6_2.5.1 fastmap_1.1.1 xfun_0.39
## [5] cachem_1.0.8 knitr_1.42 htmltools_0.5.5 rmarkdown_2.21
## [9] cli_3.6.1 sass_0.4.6 jquerylib_0.1.4 compiler_4.3.0
## [13] rstudioapi_0.15.0 tools_4.3.0 evaluate_0.21 bslib_0.4.2
## [17] yaml_2.3.7 rlang_1.1.1 jsonlite_1.8.7
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
Biomin_genes <- read_excel("~/Documents/URI/Heron-Pdam-gene-expression/BioInf/data/Biomineralization_Toolkit_FScucchia/Biomineralization_Toolkit_FScucchia.xlsx")
Biomin_genes <- Biomin_genes %>% select(-`blasted protein in Stylophora`)
Biomin_blast_results <- read.delim("~/Documents/URI/Heron-Pdam-gene-expression/BioInf/output/Biomineralization_blast_results_tab.txt", header=FALSE)
Biomin_blast_results <- Biomin_blast_results %>% select(V1, V2) %>% distinct()
# Merge data frames based on accessionnumber/geneID
merged_data <- Biomin_genes %>%
inner_join(Biomin_blast_results, by = c("accessionnumber/geneID" = "V1")) %>% rename("Pocillopora_acuta_best_hit" = "V2")
write.csv(merged_data, "~/Documents/URI/Heron-Pdam-gene-expression/BioInf/output/Biomin_blast_Pocillopora_acuta_best_hit.csv", row.names = F)
How many of our 9011 genes are represented in the Biomineralization genes?
DEGs <- read.csv(file="../../../output/Slope_Base/signif_genes_normcts.csv", sep=',', header=TRUE) %>% dplyr::select(!c('X'))
#NOTE! This is not a file only with differentially expressed genes, this contains all of the genes in our dataset but also contains p-value information and fold change information to help determine which genes are signficant DEGs based on our model in glmmSeq
rownames(DEGs) <- DEGs$Gene
dim(DEGs)
## [1] 9011 75
Biomin_genes <- DEGs %>%
inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))
Biomin_genes$definition
## [1] "Mucin4-like protein"
## [2] "Sushi domain-containing"
## [3] "Mucin-4 [Stylophora pistillata]"
## [4] "mammalian ependymin-related protein 1-like [Stylophora pistillata]"
## [5] "uncharacterized protein LOC111337489 [Stylophora pistillata]"
## [6] "Viral inclusion protein"
## [7] "Annotated: Actin"
## [8] "plasma membrane calcium ATPase [Stylophora pistillata]"
## [9] "Hephaestin-like protein"
## [10] "hephaestin-like protein [Stylophora pistillata]"
## [11] "Annotated: Vitellogenin"
## [12] "clone g15888 vitellogenin-like protein gene"
## [13] "clone g1441 vitellogenin-like protein gene"
## [14] "vitellogenin-like [Stylophora pistillata]"
## [15] "Zona pellucida domain-containing protein"
## [16] "Annotated: Zona Pellucida (ZP domain-containing)"
## [17] "Acropora millepora clone B26 hypothetical protein p251_4"
## [18] "Zona pellucida"
## [19] "ZP domain-containing protein-like [Stylophora pistillata]"
## [20] "solute carrier family 4 member gamma [Stylophora pistillata]"
## [21] "Sacsin [Stylophora pistillata]"
## [22] "Complement C3 [Stylophora pistillata]"
## [23] "uncharacterized protein LOC111323869 [Stylophora pistillata]"
## [24] "uncharacterized protein LOC111345150 [Stylophora pistillata]"
## [25] "Major yolk protein"
## [26] "major yolk protein-like isoform X2 [Stylophora pistillata]"
## [27] "SAARP3"
## [28] "Acidic SOMP (Full-Length p27)"
## [29] "Acidic skeletal organic matrix protein (Acidic SOMP)"
## [30] "CARP1 [Stylophora pistillata]"
## [31] "Annotated: CARP1"
## [32] "Uncharacterized skeletal organic matrix protein-3 (USOMP-3)"
## [33] "Collagen alpha-1 chain"
## [34] "Annotated: Tolloid-Like"
## [35] "CUB domain-containing protein-like isoform X2 [Stylophora pistillata]"
## [36] "Protocadherin-like"
## [37] "chymotrypsin-like elastase family member 1 [Stylophora pistillata]"
## [38] "Cephalotoxin-like protein"
## [39] "microtubule-associated tumor suppressor 1 homolog isoform X1 [Stylophora pistillata]"
## [40] "microtubule-associated tumor suppressor 1 homolog isoform X2 [Stylophora pistillata]"
## [41] "sodium bicarbonate cotransporter 3-like isoform X2"
## [42] "Poly [ADP-ribose] polymerase 11 [Stylophora pistillata]"
## [43] "carbonic anhydrase [Stylophora pistillata]"
## [44] "carbonic anhydrase 2"
## [45] "Annotated: Carbonic Anhydrase (STPCA2-1)"
## [46] "Annotated: CarbonicAnhyrase"
## [47] "Annotated: N/A, named it CARP6-partial"
## [48] "Annotated: USOMPS13"
## [49] "Stylophora pistillata clone g11702 hypothetical protein gene"
## [50] "Annotated: Kielin-Like"
## [51] "Kielin/chordin like"
## [52] "thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]"
## [53] "Flagellar associated protein"
## [54] "protein lingerer-like [Stylophora pistillata]"
## [55] "CUB and peptidase domain-containing protein 2-like [Stylophora pistillata]"
## [56] "Protein FAM208A [Stylophora pistillata]"
## [57] "spore wall protein 2-like isoform X3 [Stylophora pistillata]"
## [58] "L-type calcium channel alpha-1 subunit"
## [59] "Annotated: Fibronectin"
## [60] "Annotated: Fibronectin (Fibronectin-2)"
## [61] "Annotated: carbonic anhydrase (STPCA2-2)"
## [62] "Stylophora pistillata clone g19762 hypothetical protein gene"
## [63] "CARP3 [Stylophora pistillata]"
## [64] "galaxin2"
## [65] "galaxin"
## [66] "Galaxin 2"
## [67] "galaxin-like isoform X2 [Stylophora pistillata]"
## [68] "Annotated: Protoacadherin (PC4)"
## [69] "Annotated: Protocadherin (PC2)"
## [70] "Annotated: Protocadherin (PC3)"
## [71] "Annotated: Protocadherin (PC3)"
## [72] "Annotated: Cadherin"
## [73] "Annotated: Protocadherin (PC1)"
## [74] "Annotated: Protoacadherin (PC4)"
## [75] "Protocadherin fat-like"
## [76] "MAM and LDLr domain-containing protein"
## [77] "MAM and LDLr domain-containing protein"
## [78] "Annotated: MAM and LDL receptor-containing protein (MAM LDL-2)"
## [79] "MAM and LDL-receptor domain- containing protein 2"
## [80] "MAM and LDL-receptor domain- containing protein 1"
## [81] "MAM domain anchor protein"
## [82] "MAM/LDL receptor domain containing protein"
## [83] "Zonadhesion-like precursor"
## [84] "MAM and LDL-receptor class A domain-containing protein 2-like [Stylophora pistillata]"
## [85] "band 3 anion transport protein-like"
## [86] "LOW QUALITY PROTEIN: uncharacterized protein LOC111321626 [Stylophora pistillata]"
## [87] "MAGUK p55 subfamily member 7-like [Stylophora pistillata]"
## [88] "uncharacterized protein LOC111344812 [Stylophora pistillata]"
## [89] "SLIT-ROBO Rho GTPase-activating protein 1-like [Stylophora pistillata]"
## [90] "Late embryogenesis protein"
## [91] "EGF and laminin G domain-containing protein"
## [92] "EGF and laminin G domain-containing protein"
## [93] "Laminin G domain-containing protein"
## [94] "EGF and laminin G domain-containing protein"
## [95] "Annotated: EGF and LamininG-Like (EGF LamG2)"
## [96] "Annotated: EGF and LamininG-Like (EGF LamG1)"
## [97] "EGF and laminin G domain-containing protein"
## [98] "Contactin-associated protein"
## [99] "Neurexin"
## [100] "EGF and laminin G domain-containing protein-like [Stylophora pistillata]"
## [101] "Annotated: Protocadherin (PC5)"
## [102] "Protocadherin"
## [103] "endothelin-converting enzyme 1-like isoform X2 [Stylophora pistillata]"
## [104] "PHD finger protein 21A-like [Stylophora pistillata]"
## [105] "low-density lipoprotein receptor-related protein 8-like [Stylophora pistillata]"
## [106] "Acropora yongei Na+/Ca2+ exchanger"
## [107] "TSP-1 and VWA domain-containing"
## [108] "Annotated: Thrombospondin-like protein (Thrombospondin)"
## [109] "Annotated: Coadhesin"
## [110] "clone g9951 alpha collagen-like protein gene"
## [111] "Thrombospondin"
## [112] "Hemicentin"
## [113] "coadhesin-like isoform X3 [Stylophora pistillata]"
## [114] "Uncharacterized skeletal organic matrix protein-6 (USOMP6)"
## [115] "Integrin - alpha"
## [116] "hypothetical protein AWC38_SpisGene4292 [Stylophora pistillata]"
## [117] "von Willebrand factor D and EGF domain-containing protein-like, partial [Stylophora pistillata]"
## [118] "collagenase 3-like [Stylophora pistillata]"
## [119] "digestive cysteine proteinase 1-like [Stylophora pistillata]"
## [120] "Cystein-rich"
## [121] "Uncharacterized skeletal organic matrix protein-2 (USOMP-2)"
## [122] "polycystic kidney disease 1-related (PKD1-related) protein"
## [123] "polycystic kidney disease 1-related (PKD1-related) protein"
## [124] "Adi-SAARP2"
## [125] "Skeletal acidic Asp-rich Protein 2 (SAARP2)"
## [126] "CARP9"
## [127] "skeletal aspartic acid-rich protein 2-like (CARP5)"
length(Biomin_genes$definition)
## [1] 127
Biomin_genes_names <- unique(Biomin_genes$Gene)
length(Biomin_genes_names)
## [1] 65
Biomin_genes %>% select(Gene, `accessionnumber/geneID`, definition, Ref)
## Gene
## 1 Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## 2 Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## 3 Pocillopora_acuta_HIv2___RNAseq.g13823.t1
## 4 Pocillopora_acuta_HIv2___RNAseq.g25351.t1
## 5 Pocillopora_acuta_HIv2___RNAseq.g7085.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g22851.t1
## 7 Pocillopora_acuta_HIv2___RNAseq.g14505.t1
## 8 Pocillopora_acuta_HIv2___RNAseq.g27976.t1
## 9 Pocillopora_acuta_HIv2___RNAseq.g27566.t1
## 10 Pocillopora_acuta_HIv2___RNAseq.g27566.t1
## 11 Pocillopora_acuta_HIv2___TS.g13222.t1b
## 12 Pocillopora_acuta_HIv2___TS.g13222.t1b
## 13 Pocillopora_acuta_HIv2___TS.g13222.t1b
## 14 Pocillopora_acuta_HIv2___TS.g13222.t1b
## 15 Pocillopora_acuta_HIv2___TS.g2710.t1
## 16 Pocillopora_acuta_HIv2___TS.g2710.t1
## 17 Pocillopora_acuta_HIv2___TS.g2710.t1
## 18 Pocillopora_acuta_HIv2___TS.g2710.t1
## 19 Pocillopora_acuta_HIv2___TS.g2710.t1
## 20 Pocillopora_acuta_HIv2___RNAseq.g15280.t1
## 21 Pocillopora_acuta_HIv2___RNAseq.g25214.t1
## 22 Pocillopora_acuta_HIv2___RNAseq.g8821.t1
## 23 Pocillopora_acuta_HIv2___RNAseq.g21232.t1
## 24 Pocillopora_acuta_HIv2___RNAseq.g20587.t2
## 25 Pocillopora_acuta_HIv2___RNAseq.g14653.t1
## 26 Pocillopora_acuta_HIv2___RNAseq.g14653.t1
## 27 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 28 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 29 Pocillopora_acuta_HIv2___RNAseq.g13172.t1
## 30 Pocillopora_acuta_HIv2___RNAseq.g16280.t1
## 31 Pocillopora_acuta_HIv2___RNAseq.g16280.t1
## 32 Pocillopora_acuta_HIv2___TS.g23724.t1a
## 33 Pocillopora_acuta_HIv2___TS.g1359.t1
## 34 Pocillopora_acuta_HIv2___RNAseq.g26037.t1
## 35 Pocillopora_acuta_HIv2___RNAseq.g26035.t1
## 36 Pocillopora_acuta_HIv2___RNAseq.g3235.t1
## 37 Pocillopora_acuta_HIv2___RNAseq.g19288.t1
## 38 Pocillopora_acuta_HIv2___RNAseq.g5013.t1
## 39 Pocillopora_acuta_HIv2___TS.g11659.t1
## 40 Pocillopora_acuta_HIv2___TS.g11659.t1
## 41 Pocillopora_acuta_HIv2___RNAseq.g7402.t1
## 42 Pocillopora_acuta_HIv2___RNAseq.g14663.t1a
## 43 Pocillopora_acuta_HIv2___TS.g12304.t1
## 44 Pocillopora_acuta_HIv2___TS.g12304.t1
## 45 Pocillopora_acuta_HIv2___TS.g12304.t1
## 46 Pocillopora_acuta_HIv2___TS.g12304.t1
## 47 Pocillopora_acuta_HIv2___TS.g5112.t1
## 48 Pocillopora_acuta_HIv2___TS.g26810.t1
## 49 Pocillopora_acuta_HIv2___TS.g26810.t1
## 50 Pocillopora_acuta_HIv2___RNAseq.g3780.t1
## 51 Pocillopora_acuta_HIv2___RNAseq.g3780.t1
## 52 Pocillopora_acuta_HIv2___RNAseq.g10093.t2
## 53 Pocillopora_acuta_HIv2___RNAseq.g11609.t1
## 54 Pocillopora_acuta_HIv2___RNAseq.g7908.t1
## 55 Pocillopora_acuta_HIv2___RNAseq.g21338.t1
## 56 Pocillopora_acuta_HIv2___RNAseq.g26846.t1a
## 57 Pocillopora_acuta_HIv2___RNAseq.g5807.t1
## 58 Pocillopora_acuta_HIv2___RNAseq.g21501.t1
## 59 Pocillopora_acuta_HIv2___RNAseq.g21517.t1
## 60 Pocillopora_acuta_HIv2___RNAseq.g21517.t1
## 61 Pocillopora_acuta_HIv2___RNAseq.g13824.t1
## 62 Pocillopora_acuta_HIv2___TS.g425.t1
## 63 Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 64 Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 65 Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 66 Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 67 Pocillopora_acuta_HIv2___RNAseq.g30304.t2
## 68 Pocillopora_acuta_HIv2___TS.g6583.t1
## 69 Pocillopora_acuta_HIv2___TS.g6583.t1
## 70 Pocillopora_acuta_HIv2___TS.g6583.t1
## 71 Pocillopora_acuta_HIv2___TS.g6583.t1
## 72 Pocillopora_acuta_HIv2___TS.g6583.t1
## 73 Pocillopora_acuta_HIv2___TS.g6583.t1
## 74 Pocillopora_acuta_HIv2___TS.g6583.t1
## 75 Pocillopora_acuta_HIv2___TS.g6583.t1
## 76 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 77 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 78 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 79 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 80 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 81 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 82 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 83 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 84 Pocillopora_acuta_HIv2___RNAseq.g25935.t1
## 85 Pocillopora_acuta_HIv2___TS.g27873.t1
## 86 Pocillopora_acuta_HIv2___RNAseq.g7668.t1
## 87 Pocillopora_acuta_HIv2___RNAseq.g15517.t1
## 88 Pocillopora_acuta_HIv2___RNAseq.g24861.t1b
## 89 Pocillopora_acuta_HIv2___RNAseq.g27376.t1
## 90 Pocillopora_acuta_HIv2___RNAseq.g16715.t1
## 91 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 92 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 93 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 94 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 95 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 96 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 97 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 98 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 99 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 100 Pocillopora_acuta_HIv2___RNAseq.g26221.t1
## 101 Pocillopora_acuta_HIv2___RNAseq.g22388.t1
## 102 Pocillopora_acuta_HIv2___RNAseq.g22388.t1
## 103 Pocillopora_acuta_HIv2___RNAseq.g19211.t1
## 104 Pocillopora_acuta_HIv2___RNAseq.g1634.t1
## 105 Pocillopora_acuta_HIv2___RNAseq.g4085.t1
## 106 Pocillopora_acuta_HIv2___RNAseq.g24639.t1
## 107 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 108 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 109 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 110 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 111 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 112 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 113 Pocillopora_acuta_HIv2___RNAseq.g6446.t1
## 114 Pocillopora_acuta_HIv2___TS.g22622.t1
## 115 Pocillopora_acuta_HIv2___TS.g15792.t1
## 116 Pocillopora_acuta_HIv2___TS.g15792.t1
## 117 Pocillopora_acuta_HIv2___RNAseq.g28226.t2
## 118 Pocillopora_acuta_HIv2___TS.g5338.t1
## 119 Pocillopora_acuta_HIv2___RNAseq.g18103.t1
## 120 Pocillopora_acuta_HIv2___TS.g1545.t1b
## 121 Pocillopora_acuta_HIv2___TS.g1545.t1b
## 122 Pocillopora_acuta_HIv2___RNAseq.g16433.t1
## 123 Pocillopora_acuta_HIv2___RNAseq.g16433.t1
## 124 Pocillopora_acuta_HIv2___RNAseq.g22261.t1
## 125 Pocillopora_acuta_HIv2___RNAseq.g22261.t1
## 126 Pocillopora_acuta_HIv2___RNAseq.g22261.t1
## 127 Pocillopora_acuta_HIv2___RNAseq.g22261.t1
## accessionnumber/geneID
## 1 aug_v2a.09809.t1
## 2 P13_g6918
## 3 PFX18785.1
## 4 XP_022794351.1
## 5 XP_022799541.1
## 6 P4_g9861
## 7 Gene:g9094
## 8 AAR13013.1
## 9 aug_v2a.24015.t1
## 10 XP_022788227.1
## 11 Gene:g15294.t1
## 12 P24_g15888
## 13 P26_g1441
## 14 XP_022779720.1
## 15 aug_v2a.07627.t1
## 16 Gene:g907
## 17 JN631095.1
## 18 P21_g18277
## 19 XP_022806326.1
## 20 AJQ31790.1
## 21 PFX13778.1
## 22 PFX26597.1
## 23 XP_022783044.1
## 24 XP_022808163.1
## 25 P8_g9654
## 26 XP_022786918.1
## 27 aug_v2a.06327.t1
## 28 Gene:g13552
## 29 JR972076.1
## 30 AGE35225.2
## 31 Gene:g1484
## 32 JR997000.1
## 33 JR991083.1
## 34 Gene:g5735.t1
## 35 XP_022799089.1
## 36 aug_v2a.19518.t1
## 37 XP_022788730.1
## 38 JR986059.1
## 39 XP_022809269.1
## 40 XP_022809270.1
## 41 XP_022801463.1
## 42 PFX27832.1
## 43 ACE95141.1
## 44 EU532164.1
## 45 Gene:g29033.t1
## 46 Gene:g29034.t1
## 47 Gene:g8396
## 48 Gene:g30385.t1
## 49 P16_g11702
## 50 Gene:g39770
## 51 P32_g5540
## 52 XP_022804785.1
## 53 P33_g8985
## 54 XP_022806664.1
## 55 XP_022780694.1
## 56 PFX15740.1
## 57 XP_022803872.1
## 58 AAD11470.1
## 59 Gene:g22569
## 60 Gene:g37058
## 61 Gene:g27814
## 62 P22_g19762
## 63 AGE35226.1
## 64 aug_v2a.15065.t1
## 65 aug_v2a.18631.t1
## 66 JR976690.1
## 67 XP_022794122.1
## 68 AGG36361.1
## 69 Gene:10186
## 70 Gene:g10187
## 71 Gene:g10188
## 72 Gene:g2115
## 73 Gene:g2116
## 74 Gene:g30
## 75 P9_g10811;P1_g11108;P10_g11107
## 76 aug_v2a.09968.t1
## 77 aug_v2a.09969.t1
## 78 Gene:g15955
## 79 JR994474.1
## 80 JT011118.1
## 81 P20_g6066
## 82 P34_g1714
## 83 P36_g13890
## 84 XP_022794736.1
## 85 XP_022788270.1
## 86 XP_022780303.1
## 87 XP_022789932.1
## 88 XP_022807807.1
## 89 XP_022806928.1
## 90 P28_g11651
## 91 aug_v2a.06122.t1
## 92 aug_v2a.06123.t1
## 93 aug_v2a.15580.t1
## 94 aug_v2a.24512.t1
## 95 Gene:g34749
## 96 Gene:g7086
## 97 JR980881.1
## 98 P19_g20041
## 99 P31_g20420
## 100 XP_022804012.1
## 101 Gene:g24177
## 102 P23_g1057
## 103 XP_022789591.1
## 104 XP_022790441.1
## 105 XP_022798902.1
## 106 MG182344.1
## 107 aug_v2a.05945.t1
## 108 Gene:g2829
## 109 Gene:g2829.t1
## 110 P14_g9951
## 111 P3_g12510
## 112 P5_g11674
## 113 XP_022783415.1
## 114 JR971508.1
## 115 P27_g18472
## 116 PFX30903.1
## 117 XP_022810585.1
## 118 XP_022783952.1
## 119 XP_022803524.1
## 120 aug_v2a.15064.t1
## 121 JR982706.1
## 122 aug_v2a.02830
## 123 aug_v2a.02830.t1
## 124 aug_v2a.01440.t1(aug_v2a.01441.t1)
## 125 JR991407.1
## 126 P15_g1532
## 127 XP_022780690.1
## definition
## 1 Mucin4-like protein
## 2 Sushi domain-containing
## 3 Mucin-4 [Stylophora pistillata]
## 4 mammalian ependymin-related protein 1-like [Stylophora pistillata]
## 5 uncharacterized protein LOC111337489 [Stylophora pistillata]
## 6 Viral inclusion protein
## 7 Annotated: Actin
## 8 plasma membrane calcium ATPase [Stylophora pistillata]
## 9 Hephaestin-like protein
## 10 hephaestin-like protein [Stylophora pistillata]
## 11 Annotated: Vitellogenin
## 12 clone g15888 vitellogenin-like protein gene
## 13 clone g1441 vitellogenin-like protein gene
## 14 vitellogenin-like [Stylophora pistillata]
## 15 Zona pellucida domain-containing protein
## 16 Annotated: Zona Pellucida (ZP domain-containing)
## 17 Acropora millepora clone B26 hypothetical protein p251_4
## 18 Zona pellucida
## 19 ZP domain-containing protein-like [Stylophora pistillata]
## 20 solute carrier family 4 member gamma [Stylophora pistillata]
## 21 Sacsin [Stylophora pistillata]
## 22 Complement C3 [Stylophora pistillata]
## 23 uncharacterized protein LOC111323869 [Stylophora pistillata]
## 24 uncharacterized protein LOC111345150 [Stylophora pistillata]
## 25 Major yolk protein
## 26 major yolk protein-like isoform X2 [Stylophora pistillata]
## 27 SAARP3
## 28 Acidic SOMP (Full-Length p27)
## 29 Acidic skeletal organic matrix protein (Acidic SOMP)
## 30 CARP1 [Stylophora pistillata]
## 31 Annotated: CARP1
## 32 Uncharacterized skeletal organic matrix protein-3 (USOMP-3)
## 33 Collagen alpha-1 chain
## 34 Annotated: Tolloid-Like
## 35 CUB domain-containing protein-like isoform X2 [Stylophora pistillata]
## 36 Protocadherin-like
## 37 chymotrypsin-like elastase family member 1 [Stylophora pistillata]
## 38 Cephalotoxin-like protein
## 39 microtubule-associated tumor suppressor 1 homolog isoform X1 [Stylophora pistillata]
## 40 microtubule-associated tumor suppressor 1 homolog isoform X2 [Stylophora pistillata]
## 41 sodium bicarbonate cotransporter 3-like isoform X2
## 42 Poly [ADP-ribose] polymerase 11 [Stylophora pistillata]
## 43 carbonic anhydrase [Stylophora pistillata]
## 44 carbonic anhydrase 2
## 45 Annotated: Carbonic Anhydrase (STPCA2-1)
## 46 Annotated: CarbonicAnhyrase
## 47 Annotated: N/A, named it CARP6-partial
## 48 Annotated: USOMPS13
## 49 Stylophora pistillata clone g11702 hypothetical protein gene
## 50 Annotated: Kielin-Like
## 51 Kielin/chordin like
## 52 thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]
## 53 Flagellar associated protein
## 54 protein lingerer-like [Stylophora pistillata]
## 55 CUB and peptidase domain-containing protein 2-like [Stylophora pistillata]
## 56 Protein FAM208A [Stylophora pistillata]
## 57 spore wall protein 2-like isoform X3 [Stylophora pistillata]
## 58 L-type calcium channel alpha-1 subunit
## 59 Annotated: Fibronectin
## 60 Annotated: Fibronectin (Fibronectin-2)
## 61 Annotated: carbonic anhydrase (STPCA2-2)
## 62 Stylophora pistillata clone g19762 hypothetical protein gene
## 63 CARP3 [Stylophora pistillata]
## 64 galaxin2
## 65 galaxin
## 66 Galaxin 2
## 67 galaxin-like isoform X2 [Stylophora pistillata]
## 68 Annotated: Protoacadherin (PC4)
## 69 Annotated: Protocadherin (PC2)
## 70 Annotated: Protocadherin (PC3)
## 71 Annotated: Protocadherin (PC3)
## 72 Annotated: Cadherin
## 73 Annotated: Protocadherin (PC1)
## 74 Annotated: Protoacadherin (PC4)
## 75 Protocadherin fat-like
## 76 MAM and LDLr domain-containing protein
## 77 MAM and LDLr domain-containing protein
## 78 Annotated: MAM and LDL receptor-containing protein (MAM LDL-2)
## 79 MAM and LDL-receptor domain- containing protein 2
## 80 MAM and LDL-receptor domain- containing protein 1
## 81 MAM domain anchor protein
## 82 MAM/LDL receptor domain containing protein
## 83 Zonadhesion-like precursor
## 84 MAM and LDL-receptor class A domain-containing protein 2-like [Stylophora pistillata]
## 85 band 3 anion transport protein-like
## 86 LOW QUALITY PROTEIN: uncharacterized protein LOC111321626 [Stylophora pistillata]
## 87 MAGUK p55 subfamily member 7-like [Stylophora pistillata]
## 88 uncharacterized protein LOC111344812 [Stylophora pistillata]
## 89 SLIT-ROBO Rho GTPase-activating protein 1-like [Stylophora pistillata]
## 90 Late embryogenesis protein
## 91 EGF and laminin G domain-containing protein
## 92 EGF and laminin G domain-containing protein
## 93 Laminin G domain-containing protein
## 94 EGF and laminin G domain-containing protein
## 95 Annotated: EGF and LamininG-Like (EGF LamG2)
## 96 Annotated: EGF and LamininG-Like (EGF LamG1)
## 97 EGF and laminin G domain-containing protein
## 98 Contactin-associated protein
## 99 Neurexin
## 100 EGF and laminin G domain-containing protein-like [Stylophora pistillata]
## 101 Annotated: Protocadherin (PC5)
## 102 Protocadherin
## 103 endothelin-converting enzyme 1-like isoform X2 [Stylophora pistillata]
## 104 PHD finger protein 21A-like [Stylophora pistillata]
## 105 low-density lipoprotein receptor-related protein 8-like [Stylophora pistillata]
## 106 Acropora yongei Na+/Ca2+ exchanger
## 107 TSP-1 and VWA domain-containing
## 108 Annotated: Thrombospondin-like protein (Thrombospondin)
## 109 Annotated: Coadhesin
## 110 clone g9951 alpha collagen-like protein gene
## 111 Thrombospondin
## 112 Hemicentin
## 113 coadhesin-like isoform X3 [Stylophora pistillata]
## 114 Uncharacterized skeletal organic matrix protein-6 (USOMP6)
## 115 Integrin - alpha
## 116 hypothetical protein AWC38_SpisGene4292 [Stylophora pistillata]
## 117 von Willebrand factor D and EGF domain-containing protein-like, partial [Stylophora pistillata]
## 118 collagenase 3-like [Stylophora pistillata]
## 119 digestive cysteine proteinase 1-like [Stylophora pistillata]
## 120 Cystein-rich
## 121 Uncharacterized skeletal organic matrix protein-2 (USOMP-2)
## 122 polycystic kidney disease 1-related (PKD1-related) protein
## 123 polycystic kidney disease 1-related (PKD1-related) protein
## 124 Adi-SAARP2
## 125 Skeletal acidic Asp-rich Protein 2 (SAARP2)
## 126 CARP9
## 127 skeletal aspartic acid-rich protein 2-like (CARP5)
## Ref
## 1 Takeuchi et al., 2016
## 2 Drake et al., 2013
## 3 Peled et al., 2020
## 4 Peled et al., 2020
## 5 Peled et al., 2020
## 6 Drake et al., 2013
## 7 Mummadisetti et al., 2021
## 8 Zoccola et al., 2004
## 9 Takeuchi et al., 2016
## 10 Peled et al., 2020
## 11 Mummadisetti et al., 2021
## 12 Drake et al., 2013
## 13 Drake et al., 2013
## 14 Peled et al., 2020
## 15 Takeuchi et al., 2016
## 16 Mummadisetti et al., 2021
## 17 Hayward et al., 2011
## 18 Drake et al., 2013
## 19 Peled et al., 2020
## 20 Zoccola et al., 2015
## 21 Peled et al., 2020
## 22 Peled et al., 2020
## 23 Peled et al., 2020
## 24 Peled et al., 2020
## 25 Drake et al., 2013
## 26 Peled et al., 2020
## 27 Takeuchi et al., 2016
## 28 Mummadisetti et al., 2021
## 29 Ramos-Silva et al., 2013
## 30 Mass et al., 2013
## 31 Mummadisetti et al., 2021
## 32 Ramos-Silva et al., 2013
## 33 Ramos-Silva et al., 2013
## 34 Mummadisetti et al., 2021
## 35 Peled et al., 2020
## 36 Takeuchi et al., 2016
## 37 Peled et al., 2020
## 38 Ramos-Silva et al., 2013
## 39 Peled et al., 2020
## 40 Peled et al., 2020
## 41 Zoccola et al., 2015
## 42 Peled et al., 2020
## 43 Moya et al., 2008
## 44 Bertucci et al., 2011
## 45 Mummadisetti et al., 2021
## 46 Mummadisetti et al., 2021
## 47 Mummadisetti et al., 2021
## 48 Mummadisetti et al., 2021
## 49 Drake et al., 2013
## 50 Mummadisetti et al., 2021
## 51 Drake et al., 2013
## 52 Peled et al., 2020
## 53 Drake et al., 2013
## 54 Peled et al., 2020
## 55 Peled et al., 2020
## 56 Peled et al., 2020
## 57 Peled et al., 2020
## 58 Zoccola et al., 1999
## 59 Mummadisetti et al., 2021
## 60 Mummadisetti et al., 2021
## 61 Mummadisetti et al., 2021
## 62 Drake et al., 2013
## 63 Mass et al., 2013
## 64 Takeuchi et al., 2016
## 65 Takeuchi et al., 2016
## 66 Ramos-Silva et al., 2013
## 67 Peled et al., 2020
## 68 Drake et al., 2013
## 69 Mummadisetti et al., 2021
## 70 Mummadisetti et al., 2021
## 71 Mummadisetti et al., 2021
## 72 Mummadisetti et al., 2021
## 73 Mummadisetti et al., 2021
## 74 Mummadisetti et al., 2021
## 75 Drake et al., 2013
## 76 Takeuchi et al., 2016
## 77 Takeuchi et al., 2016
## 78 Mummadisetti et al., 2021
## 79 Ramos-Silva et al., 2013
## 80 Ramos-Silva et al., 2013
## 81 Drake et al., 2013
## 82 Drake et al., 2013
## 83 Drake et al., 2013
## 84 Peled et al., 2020
## 85 Zoccola et al., 2015
## 86 Peled et al., 2020
## 87 Peled et al., 2020
## 88 Peled et al., 2020
## 89 Peled et al., 2020
## 90 Drake et al., 2013
## 91 Takeuchi et al., 2016
## 92 Takeuchi et al., 2016
## 93 Takeuchi et al., 2016
## 94 Takeuchi et al., 2016
## 95 Mummadisetti et al., 2021
## 96 Mummadisetti et al., 2021
## 97 Ramos-Silva et al., 2013
## 98 Drake et al., 2013
## 99 Drake et al., 2013
## 100 Peled et al., 2020
## 101 Mummadisetti et al., 2021
## 102 Drake et al., 2013
## 103 Peled et al., 2020
## 104 Peled et al., 2020
## 105 Peled et al., 2020
## 106 Barron et al., 2018
## 107 Takeuchi et al., 2016
## 108 Mummadisetti et al., 2021
## 109 Mummadisetti et al., 2021
## 110 Drake et al., 2013
## 111 Drake et al., 2013
## 112 Drake et al., 2013
## 113 Peled et al., 2020
## 114 Ramos-Silva et al., 2013
## 115 Drake et al., 2013
## 116 Peled et al., 2020
## 117 Peled et al., 2020
## 118 Peled et al., 2020
## 119 Peled et al., 2020
## 120 Takeuchi et al., 2016
## 121 Ramos-Silva et al., 2013
## 122 Takeuchi et al., 2016
## 123 Takeuchi et al., 2016
## 124 Takeuchi et al., 2016
## 125 Ramos-Silva et al., 2013
## 126 Drake et al., 2013
## 127 Peled et al., 2020
#Biomin_genes %>% select(Gene, `accessionnumber/geneID`, definition, Ref, Origin, Treatment, Treatment.Origin) %>% View()
127/172 of the Biomineralization Genes are represented in our dataset of 9011 genes, matching to 65/9011 genes
Differentially expressed genes: are any of these Biomineralization genes?
Origin_DEGs <- DEGs %>% dplyr::filter(Origin < 0.05)
nrow(Origin_DEGs)
## [1] 840
Treatment_DEGs <- DEGs %>% dplyr::filter(Treatment < 0.05)
nrow(Treatment_DEGs)
## [1] 18
Interaction_DEGs <- DEGs %>% dplyr::filter(Treatment.Origin < 0.05)
nrow(Interaction_DEGs)
## [1] 30
Setting up for plotting genes, loading in results from glmmseq
library(glmmSeq)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.5.4
## Current Matrix version is 1.6.0
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
## Warning in checkDepPackageVersion(dep_pkg = "TMB"): Package version inconsistency detected.
## glmmTMB was built with TMB version 1.9.3
## Current TMB version is 1.9.4
## Please re-install glmmTMB from source or restore original 'TMB' package (see '?reinstalling' for more information)
results <- readRDS(file = "glmmSeq.rds") #load in RDS from previous step / previous iteration
results <- glmmQvals(results)
##
## Treatment
## ---------
## Not Significant Significant
## 8993 18
##
## Origin
## ------
## Not Significant Significant
## 8171 840
##
## Treatment:Origin
## ----------------
## Not Significant Significant
## 8981 30
source(file = "../Factor_ggmodelPlot.R")
plotColours <- c("skyblue","mediumseagreen")
modColours <- c("dodgerblue3","seagreen4")
Biomin_Origin_DEGs <- Origin_DEGs %>%
inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))
Biomin_Origin_DEGs$definition
## [1] "mammalian ependymin-related protein 1-like [Stylophora pistillata]"
## [2] "Annotated: Vitellogenin"
## [3] "clone g15888 vitellogenin-like protein gene"
## [4] "clone g1441 vitellogenin-like protein gene"
## [5] "vitellogenin-like [Stylophora pistillata]"
## [6] "uncharacterized protein LOC111323869 [Stylophora pistillata]"
## [7] "uncharacterized protein LOC111345150 [Stylophora pistillata]"
## [8] "Cephalotoxin-like protein"
## [9] "carbonic anhydrase [Stylophora pistillata]"
## [10] "carbonic anhydrase 2"
## [11] "Annotated: Carbonic Anhydrase (STPCA2-1)"
## [12] "Annotated: CarbonicAnhyrase"
## [13] "thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]"
## [14] "protein lingerer-like [Stylophora pistillata]"
## [15] "Annotated: carbonic anhydrase (STPCA2-2)"
## [16] "Late embryogenesis protein"
length(Biomin_Origin_DEGs$definition)
## [1] 16
Biomin_Origin_DEG_names <- unique(Biomin_Origin_DEGs$Gene)
length(Biomin_Origin_DEG_names)
## [1] 10
Biomin_Origin_DEGs %>% select(Gene, `accessionnumber/geneID`, definition, Ref)
## Gene accessionnumber/geneID
## 1 Pocillopora_acuta_HIv2___RNAseq.g25351.t1 XP_022794351.1
## 2 Pocillopora_acuta_HIv2___TS.g13222.t1b Gene:g15294.t1
## 3 Pocillopora_acuta_HIv2___TS.g13222.t1b P24_g15888
## 4 Pocillopora_acuta_HIv2___TS.g13222.t1b P26_g1441
## 5 Pocillopora_acuta_HIv2___TS.g13222.t1b XP_022779720.1
## 6 Pocillopora_acuta_HIv2___RNAseq.g21232.t1 XP_022783044.1
## 7 Pocillopora_acuta_HIv2___RNAseq.g20587.t2 XP_022808163.1
## 8 Pocillopora_acuta_HIv2___RNAseq.g5013.t1 JR986059.1
## 9 Pocillopora_acuta_HIv2___TS.g12304.t1 ACE95141.1
## 10 Pocillopora_acuta_HIv2___TS.g12304.t1 EU532164.1
## 11 Pocillopora_acuta_HIv2___TS.g12304.t1 Gene:g29033.t1
## 12 Pocillopora_acuta_HIv2___TS.g12304.t1 Gene:g29034.t1
## 13 Pocillopora_acuta_HIv2___RNAseq.g10093.t2 XP_022804785.1
## 14 Pocillopora_acuta_HIv2___RNAseq.g7908.t1 XP_022806664.1
## 15 Pocillopora_acuta_HIv2___RNAseq.g13824.t1 Gene:g27814
## 16 Pocillopora_acuta_HIv2___RNAseq.g16715.t1 P28_g11651
## definition
## 1 mammalian ependymin-related protein 1-like [Stylophora pistillata]
## 2 Annotated: Vitellogenin
## 3 clone g15888 vitellogenin-like protein gene
## 4 clone g1441 vitellogenin-like protein gene
## 5 vitellogenin-like [Stylophora pistillata]
## 6 uncharacterized protein LOC111323869 [Stylophora pistillata]
## 7 uncharacterized protein LOC111345150 [Stylophora pistillata]
## 8 Cephalotoxin-like protein
## 9 carbonic anhydrase [Stylophora pistillata]
## 10 carbonic anhydrase 2
## 11 Annotated: Carbonic Anhydrase (STPCA2-1)
## 12 Annotated: CarbonicAnhyrase
## 13 thioredoxin reductase 1, cytoplasmic-like [Stylophora pistillata]
## 14 protein lingerer-like [Stylophora pistillata]
## 15 Annotated: carbonic anhydrase (STPCA2-2)
## 16 Late embryogenesis protein
## Ref
## 1 Peled et al., 2020
## 2 Mummadisetti et al., 2021
## 3 Drake et al., 2013
## 4 Drake et al., 2013
## 5 Peled et al., 2020
## 6 Peled et al., 2020
## 7 Peled et al., 2020
## 8 Ramos-Silva et al., 2013
## 9 Moya et al., 2008
## 10 Bertucci et al., 2011
## 11 Mummadisetti et al., 2021
## 12 Mummadisetti et al., 2021
## 13 Peled et al., 2020
## 14 Peled et al., 2020
## 15 Mummadisetti et al., 2021
## 16 Drake et al., 2013
16/172 of the Biomineralization Genes are represented in the Origin DEGS, and these are 10 Pocillopora genes (some of the 10 have matches to multiple Biomineralization Genes) out of the 65 that are matching to Biomineralization Genes (10/65)
Pocillopora_acuta_HIv2___TS.g13222.t1b is a best match for: - Gene:g15294.t1 Annotated: Vitellogenin - P24_g15888 clone g15888 vitellogenin-like protein gene - P26_g1441 clone g1441 vitellogenin-like protein gene - XP_022779720.1 vitellogenin-like [Stylophora pistillata]
Pocillopora_acuta_HIv2___TS.g12304.t1 is a best match for: - ACE95141.1 carbonic anhydrase [Stylophora pistillata] - EU532164.1 carbonic anhydrase 2 - Gene:g29033.t1 Annotated: Carbonic Anhydrase (STPCA2-1) - Gene:g29034.t1 Annotated: CarbonicAnhyrase
for (i in Biomin_Origin_DEG_names) {print(Factor_ggmodelPlot(results,
geneName = i,
x1var = "Treatment",
x2var="Origin", addBox = T,
xlab = "Treatment and Origin",
title = i,
colours = plotColours,
lineColours = plotColours,
modelColours = modColours,
modelSize = 3))}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Biomin_Treatment_DEGs <- Treatment_DEGs %>%
inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))
Biomin_Treatment_DEGs$definition
## character(0)
0/172 of the Biomineralization Genes are represented in the Treatment DEGS
Biomin_Interaction_DEGs <- Interaction_DEGs %>%
inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))
Biomin_Interaction_DEGs$definition
## character(0)
0/172 of the Biomineralization Genes are represented in the Interaction DEGS
Frontloaded genes!
FRONTs <- read.csv(file="../../../output/Slope_Base/frontloaded_genes.csv", sep=',', header=TRUE) %>% dplyr::select(!c('X'))
Biomin_FRONTs <- FRONTs %>%
inner_join(merged_data, by = c("Gene" = "Pocillopora_acuta_best_hit"))
Biomin_FRONTs$definition
## [1] "Mucin4-like protein"
## [2] "Sushi domain-containing"
## [3] "Mucin-4 [Stylophora pistillata]"
## [4] "plasma membrane calcium ATPase [Stylophora pistillata]"
## [5] "Hephaestin-like protein"
## [6] "hephaestin-like protein [Stylophora pistillata]"
## [7] "solute carrier family 4 member gamma [Stylophora pistillata]"
## [8] "Complement C3 [Stylophora pistillata]"
## [9] "Major yolk protein"
## [10] "major yolk protein-like isoform X2 [Stylophora pistillata]"
## [11] "CARP1 [Stylophora pistillata]"
## [12] "Annotated: CARP1"
## [13] "Uncharacterized skeletal organic matrix protein-3 (USOMP-3)"
## [14] "Annotated: Tolloid-Like"
## [15] "Cephalotoxin-like protein"
## [16] "microtubule-associated tumor suppressor 1 homolog isoform X1 [Stylophora pistillata]"
## [17] "microtubule-associated tumor suppressor 1 homolog isoform X2 [Stylophora pistillata]"
## [18] "sodium bicarbonate cotransporter 3-like isoform X2"
## [19] "carbonic anhydrase [Stylophora pistillata]"
## [20] "carbonic anhydrase 2"
## [21] "Annotated: Carbonic Anhydrase (STPCA2-1)"
## [22] "Annotated: CarbonicAnhyrase"
## [23] "spore wall protein 2-like isoform X3 [Stylophora pistillata]"
## [24] "L-type calcium channel alpha-1 subunit"
## [25] "Annotated: carbonic anhydrase (STPCA2-2)"
## [26] "MAM and LDLr domain-containing protein"
## [27] "MAM and LDLr domain-containing protein"
## [28] "Annotated: MAM and LDL receptor-containing protein (MAM LDL-2)"
## [29] "MAM and LDL-receptor domain- containing protein 2"
## [30] "MAM and LDL-receptor domain- containing protein 1"
## [31] "MAM domain anchor protein"
## [32] "MAM/LDL receptor domain containing protein"
## [33] "Zonadhesion-like precursor"
## [34] "MAM and LDL-receptor class A domain-containing protein 2-like [Stylophora pistillata]"
## [35] "MAGUK p55 subfamily member 7-like [Stylophora pistillata]"
## [36] "uncharacterized protein LOC111344812 [Stylophora pistillata]"
## [37] "SLIT-ROBO Rho GTPase-activating protein 1-like [Stylophora pistillata]"
## [38] "Late embryogenesis protein"
## [39] "EGF and laminin G domain-containing protein"
## [40] "EGF and laminin G domain-containing protein"
## [41] "Laminin G domain-containing protein"
## [42] "EGF and laminin G domain-containing protein"
## [43] "Annotated: EGF and LamininG-Like (EGF LamG2)"
## [44] "Annotated: EGF and LamininG-Like (EGF LamG1)"
## [45] "EGF and laminin G domain-containing protein"
## [46] "Contactin-associated protein"
## [47] "Neurexin"
## [48] "EGF and laminin G domain-containing protein-like [Stylophora pistillata]"
## [49] "low-density lipoprotein receptor-related protein 8-like [Stylophora pistillata]"
## [50] "TSP-1 and VWA domain-containing"
## [51] "Annotated: Thrombospondin-like protein (Thrombospondin)"
## [52] "Annotated: Coadhesin"
## [53] "clone g9951 alpha collagen-like protein gene"
## [54] "Thrombospondin"
## [55] "Hemicentin"
## [56] "coadhesin-like isoform X3 [Stylophora pistillata]"
## [57] "Integrin - alpha"
## [58] "hypothetical protein AWC38_SpisGene4292 [Stylophora pistillata]"
## [59] "von Willebrand factor D and EGF domain-containing protein-like, partial [Stylophora pistillata]"
## [60] "digestive cysteine proteinase 1-like [Stylophora pistillata]"
## [61] "Cystein-rich"
## [62] "Uncharacterized skeletal organic matrix protein-2 (USOMP-2)"
## [63] "polycystic kidney disease 1-related (PKD1-related) protein"
## [64] "polycystic kidney disease 1-related (PKD1-related) protein"
length(Biomin_FRONTs$definition)
## [1] 64
Biomin_FRONTs_names <- unique(Biomin_FRONTs$Gene)
length(Biomin_FRONTs_names)
## [1] 29
Biomin_FRONTs %>% select(Gene, `accessionnumber/geneID`, definition, Ref)
## Gene accessionnumber/geneID
## 1 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 aug_v2a.09809.t1
## 2 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 P13_g6918
## 3 Pocillopora_acuta_HIv2___RNAseq.g13823.t1 PFX18785.1
## 4 Pocillopora_acuta_HIv2___RNAseq.g27976.t1 AAR13013.1
## 5 Pocillopora_acuta_HIv2___RNAseq.g27566.t1 aug_v2a.24015.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g27566.t1 XP_022788227.1
## 7 Pocillopora_acuta_HIv2___RNAseq.g15280.t1 AJQ31790.1
## 8 Pocillopora_acuta_HIv2___RNAseq.g8821.t1 PFX26597.1
## 9 Pocillopora_acuta_HIv2___RNAseq.g14653.t1 P8_g9654
## 10 Pocillopora_acuta_HIv2___RNAseq.g14653.t1 XP_022786918.1
## 11 Pocillopora_acuta_HIv2___RNAseq.g16280.t1 AGE35225.2
## 12 Pocillopora_acuta_HIv2___RNAseq.g16280.t1 Gene:g1484
## 13 Pocillopora_acuta_HIv2___TS.g23724.t1a JR997000.1
## 14 Pocillopora_acuta_HIv2___RNAseq.g26037.t1 Gene:g5735.t1
## 15 Pocillopora_acuta_HIv2___RNAseq.g5013.t1 JR986059.1
## 16 Pocillopora_acuta_HIv2___TS.g11659.t1 XP_022809269.1
## 17 Pocillopora_acuta_HIv2___TS.g11659.t1 XP_022809270.1
## 18 Pocillopora_acuta_HIv2___RNAseq.g7402.t1 XP_022801463.1
## 19 Pocillopora_acuta_HIv2___TS.g12304.t1 ACE95141.1
## 20 Pocillopora_acuta_HIv2___TS.g12304.t1 EU532164.1
## 21 Pocillopora_acuta_HIv2___TS.g12304.t1 Gene:g29033.t1
## 22 Pocillopora_acuta_HIv2___TS.g12304.t1 Gene:g29034.t1
## 23 Pocillopora_acuta_HIv2___RNAseq.g5807.t1 XP_022803872.1
## 24 Pocillopora_acuta_HIv2___RNAseq.g21501.t1 AAD11470.1
## 25 Pocillopora_acuta_HIv2___RNAseq.g13824.t1 Gene:g27814
## 26 Pocillopora_acuta_HIv2___RNAseq.g25935.t1 aug_v2a.09968.t1
## 27 Pocillopora_acuta_HIv2___RNAseq.g25935.t1 aug_v2a.09969.t1
## 28 Pocillopora_acuta_HIv2___RNAseq.g25935.t1 Gene:g15955
## 29 Pocillopora_acuta_HIv2___RNAseq.g25935.t1 JR994474.1
## 30 Pocillopora_acuta_HIv2___RNAseq.g25935.t1 JT011118.1
## 31 Pocillopora_acuta_HIv2___RNAseq.g25935.t1 P20_g6066
## 32 Pocillopora_acuta_HIv2___RNAseq.g25935.t1 P34_g1714
## 33 Pocillopora_acuta_HIv2___RNAseq.g25935.t1 P36_g13890
## 34 Pocillopora_acuta_HIv2___RNAseq.g25935.t1 XP_022794736.1
## 35 Pocillopora_acuta_HIv2___RNAseq.g15517.t1 XP_022789932.1
## 36 Pocillopora_acuta_HIv2___RNAseq.g24861.t1b XP_022807807.1
## 37 Pocillopora_acuta_HIv2___RNAseq.g27376.t1 XP_022806928.1
## 38 Pocillopora_acuta_HIv2___RNAseq.g16715.t1 P28_g11651
## 39 Pocillopora_acuta_HIv2___RNAseq.g26221.t1 aug_v2a.06122.t1
## 40 Pocillopora_acuta_HIv2___RNAseq.g26221.t1 aug_v2a.06123.t1
## 41 Pocillopora_acuta_HIv2___RNAseq.g26221.t1 aug_v2a.15580.t1
## 42 Pocillopora_acuta_HIv2___RNAseq.g26221.t1 aug_v2a.24512.t1
## 43 Pocillopora_acuta_HIv2___RNAseq.g26221.t1 Gene:g34749
## 44 Pocillopora_acuta_HIv2___RNAseq.g26221.t1 Gene:g7086
## 45 Pocillopora_acuta_HIv2___RNAseq.g26221.t1 JR980881.1
## 46 Pocillopora_acuta_HIv2___RNAseq.g26221.t1 P19_g20041
## 47 Pocillopora_acuta_HIv2___RNAseq.g26221.t1 P31_g20420
## 48 Pocillopora_acuta_HIv2___RNAseq.g26221.t1 XP_022804012.1
## 49 Pocillopora_acuta_HIv2___RNAseq.g4085.t1 XP_022798902.1
## 50 Pocillopora_acuta_HIv2___RNAseq.g6446.t1 aug_v2a.05945.t1
## 51 Pocillopora_acuta_HIv2___RNAseq.g6446.t1 Gene:g2829
## 52 Pocillopora_acuta_HIv2___RNAseq.g6446.t1 Gene:g2829.t1
## 53 Pocillopora_acuta_HIv2___RNAseq.g6446.t1 P14_g9951
## 54 Pocillopora_acuta_HIv2___RNAseq.g6446.t1 P3_g12510
## 55 Pocillopora_acuta_HIv2___RNAseq.g6446.t1 P5_g11674
## 56 Pocillopora_acuta_HIv2___RNAseq.g6446.t1 XP_022783415.1
## 57 Pocillopora_acuta_HIv2___TS.g15792.t1 P27_g18472
## 58 Pocillopora_acuta_HIv2___TS.g15792.t1 PFX30903.1
## 59 Pocillopora_acuta_HIv2___RNAseq.g28226.t2 XP_022810585.1
## 60 Pocillopora_acuta_HIv2___RNAseq.g18103.t1 XP_022803524.1
## 61 Pocillopora_acuta_HIv2___TS.g1545.t1b aug_v2a.15064.t1
## 62 Pocillopora_acuta_HIv2___TS.g1545.t1b JR982706.1
## 63 Pocillopora_acuta_HIv2___RNAseq.g16433.t1 aug_v2a.02830
## 64 Pocillopora_acuta_HIv2___RNAseq.g16433.t1 aug_v2a.02830.t1
## definition
## 1 Mucin4-like protein
## 2 Sushi domain-containing
## 3 Mucin-4 [Stylophora pistillata]
## 4 plasma membrane calcium ATPase [Stylophora pistillata]
## 5 Hephaestin-like protein
## 6 hephaestin-like protein [Stylophora pistillata]
## 7 solute carrier family 4 member gamma [Stylophora pistillata]
## 8 Complement C3 [Stylophora pistillata]
## 9 Major yolk protein
## 10 major yolk protein-like isoform X2 [Stylophora pistillata]
## 11 CARP1 [Stylophora pistillata]
## 12 Annotated: CARP1
## 13 Uncharacterized skeletal organic matrix protein-3 (USOMP-3)
## 14 Annotated: Tolloid-Like
## 15 Cephalotoxin-like protein
## 16 microtubule-associated tumor suppressor 1 homolog isoform X1 [Stylophora pistillata]
## 17 microtubule-associated tumor suppressor 1 homolog isoform X2 [Stylophora pistillata]
## 18 sodium bicarbonate cotransporter 3-like isoform X2
## 19 carbonic anhydrase [Stylophora pistillata]
## 20 carbonic anhydrase 2
## 21 Annotated: Carbonic Anhydrase (STPCA2-1)
## 22 Annotated: CarbonicAnhyrase
## 23 spore wall protein 2-like isoform X3 [Stylophora pistillata]
## 24 L-type calcium channel alpha-1 subunit
## 25 Annotated: carbonic anhydrase (STPCA2-2)
## 26 MAM and LDLr domain-containing protein
## 27 MAM and LDLr domain-containing protein
## 28 Annotated: MAM and LDL receptor-containing protein (MAM LDL-2)
## 29 MAM and LDL-receptor domain- containing protein 2
## 30 MAM and LDL-receptor domain- containing protein 1
## 31 MAM domain anchor protein
## 32 MAM/LDL receptor domain containing protein
## 33 Zonadhesion-like precursor
## 34 MAM and LDL-receptor class A domain-containing protein 2-like [Stylophora pistillata]
## 35 MAGUK p55 subfamily member 7-like [Stylophora pistillata]
## 36 uncharacterized protein LOC111344812 [Stylophora pistillata]
## 37 SLIT-ROBO Rho GTPase-activating protein 1-like [Stylophora pistillata]
## 38 Late embryogenesis protein
## 39 EGF and laminin G domain-containing protein
## 40 EGF and laminin G domain-containing protein
## 41 Laminin G domain-containing protein
## 42 EGF and laminin G domain-containing protein
## 43 Annotated: EGF and LamininG-Like (EGF LamG2)
## 44 Annotated: EGF and LamininG-Like (EGF LamG1)
## 45 EGF and laminin G domain-containing protein
## 46 Contactin-associated protein
## 47 Neurexin
## 48 EGF and laminin G domain-containing protein-like [Stylophora pistillata]
## 49 low-density lipoprotein receptor-related protein 8-like [Stylophora pistillata]
## 50 TSP-1 and VWA domain-containing
## 51 Annotated: Thrombospondin-like protein (Thrombospondin)
## 52 Annotated: Coadhesin
## 53 clone g9951 alpha collagen-like protein gene
## 54 Thrombospondin
## 55 Hemicentin
## 56 coadhesin-like isoform X3 [Stylophora pistillata]
## 57 Integrin - alpha
## 58 hypothetical protein AWC38_SpisGene4292 [Stylophora pistillata]
## 59 von Willebrand factor D and EGF domain-containing protein-like, partial [Stylophora pistillata]
## 60 digestive cysteine proteinase 1-like [Stylophora pistillata]
## 61 Cystein-rich
## 62 Uncharacterized skeletal organic matrix protein-2 (USOMP-2)
## 63 polycystic kidney disease 1-related (PKD1-related) protein
## 64 polycystic kidney disease 1-related (PKD1-related) protein
## Ref
## 1 Takeuchi et al., 2016
## 2 Drake et al., 2013
## 3 Peled et al., 2020
## 4 Zoccola et al., 2004
## 5 Takeuchi et al., 2016
## 6 Peled et al., 2020
## 7 Zoccola et al., 2015
## 8 Peled et al., 2020
## 9 Drake et al., 2013
## 10 Peled et al., 2020
## 11 Mass et al., 2013
## 12 Mummadisetti et al., 2021
## 13 Ramos-Silva et al., 2013
## 14 Mummadisetti et al., 2021
## 15 Ramos-Silva et al., 2013
## 16 Peled et al., 2020
## 17 Peled et al., 2020
## 18 Zoccola et al., 2015
## 19 Moya et al., 2008
## 20 Bertucci et al., 2011
## 21 Mummadisetti et al., 2021
## 22 Mummadisetti et al., 2021
## 23 Peled et al., 2020
## 24 Zoccola et al., 1999
## 25 Mummadisetti et al., 2021
## 26 Takeuchi et al., 2016
## 27 Takeuchi et al., 2016
## 28 Mummadisetti et al., 2021
## 29 Ramos-Silva et al., 2013
## 30 Ramos-Silva et al., 2013
## 31 Drake et al., 2013
## 32 Drake et al., 2013
## 33 Drake et al., 2013
## 34 Peled et al., 2020
## 35 Peled et al., 2020
## 36 Peled et al., 2020
## 37 Peled et al., 2020
## 38 Drake et al., 2013
## 39 Takeuchi et al., 2016
## 40 Takeuchi et al., 2016
## 41 Takeuchi et al., 2016
## 42 Takeuchi et al., 2016
## 43 Mummadisetti et al., 2021
## 44 Mummadisetti et al., 2021
## 45 Ramos-Silva et al., 2013
## 46 Drake et al., 2013
## 47 Drake et al., 2013
## 48 Peled et al., 2020
## 49 Peled et al., 2020
## 50 Takeuchi et al., 2016
## 51 Mummadisetti et al., 2021
## 52 Mummadisetti et al., 2021
## 53 Drake et al., 2013
## 54 Drake et al., 2013
## 55 Drake et al., 2013
## 56 Peled et al., 2020
## 57 Drake et al., 2013
## 58 Peled et al., 2020
## 59 Peled et al., 2020
## 60 Peled et al., 2020
## 61 Takeuchi et al., 2016
## 62 Ramos-Silva et al., 2013
## 63 Takeuchi et al., 2016
## 64 Takeuchi et al., 2016
64/172 of the Biomineralization Genes are represented in the Frontloaded genes
This is 29 genes, some of which are mapping to multiple Biomineralization genes, out of the 65 that are matching to Biomineralization Genes (29/65)
for (i in Biomin_FRONTs_names) {print(Factor_ggmodelPlot(results,
geneName = i,
x1var = "Treatment",
x2var="Origin", addBox = T,
xlab = "Treatment and Origin",
title = i,
colours = plotColours,
lineColours = plotColours,
modelColours = modColours,
modelSize = 3))}
READY <- read.csv(file="../../../output/Slope_Base/frontloaded_genes_plotting.csv", sep=',', header=TRUE) %>% dplyr::select(!c('X'))
READY$color <- rep('gray', nrow(READY))
#These are "frontloaded, need a different color:
READY$color[READY$yall > 1 & READY$xall_1 < 1] <- 'black'
READY$color[READY$Gene %in% merged_data$Pocillopora_acuta_best_hit] <- 'red'
READY$color[READY$yall > 1 & READY$xall_1 < 1 & READY$Gene %in% merged_data$Pocillopora_acuta_best_hit] <- 'pink'
READY_cutoff <- READY %>% dplyr::filter(yall < 6) %>% dplyr::filter(xall_1 < 6)
P <- READY_cutoff %>%
ggplot(aes(x=xall_1, y=yall)) +
#geom_point(colour = READY_cutoff$color, alpha=0.8) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color != "red"), colour = subset(READY_cutoff$color, READY_cutoff$color != "red"), alpha = 0.8) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color == "red"), colour = subset(READY_cutoff$color, READY_cutoff$color == "red"), alpha = 0.8) +
geom_point(data = subset(READY_cutoff, READY_cutoff$color == "pink"), colour = subset(READY_cutoff$color, READY_cutoff$color == "pink"), alpha = 1) +
theme_classic() +
stat_smooth(method = "lm", formula = y ~ x + poly(x, 2) - 1) +
geom_vline(xintercept=1, linetype="dotted") +
geom_hline(yintercept=1, linetype="dotted") +
labs(y= "Flat to Slope (Conditioned to naive) control ratio",
x = "Flat to Slope (Conditioned to naive) foldchange ratio",
title = "Frontloaded genes") +
scale_x_continuous(limits = c(0,6.1),expand = c(0, 0)) + scale_y_continuous(limits = c(0,6.1), expand = c(0, 0)) +
annotate("rect", xmin = 0, xmax = 1, ymin = 1, ymax = 6.1, alpha = .2) +
annotate("rect", xmin = 0, xmax = 1, ymin = 0, ymax = 1,alpha = .5)
P
Output lists of frontloaded genes with Biomineralization gene info
Biomin_FRONTs_info <- merged_data %>% filter(merged_data$Pocillopora_acuta_best_hit %in% Biomin_FRONTs_names)
write.csv(Biomin_FRONTs_info, "~/Documents/URI/Heron-Pdam-gene-expression/BioInf/output/Biomin_frontloaded.csv", row.names = F)